#delimit ;
cap log close ;
log using t1_boot_breakdown.log, text replace ;

clear ;
set seed 9479826 ;
set more off ;

do set_directory_macros ;

cap prog drop runme ;
prog def runme ;

local bsreps = 999 ;

/* generate state policy variable */
use statefip using "${madedat}/CPS_2012_micro" , replace ;
contract statefip ;
generate sort_order = uniform() ;
sort sort_order ;
keep if _n <= 6 ;
gen policy = _n <= 3 ;
sort statefip ;
drop sort_order _freq ;

tempfile statelist ;
qui save `statelist' ;


use "${madedat}/CPS_2012_micro" , replace ;

sort statefip ;
merge m:1  statefip using `statelist' ;
tab _merge ;
keep if _merge == 3 ;
drop _merge ;
tab statefip ;



/* first run the basic regressions for G=6 */
reg lnwage policy age age2 yrseduc  ;
reg lnwage policy age age2 yrseduc  , robust ;
reg lnwage policy age age2 yrseduc  , cluster(statefip) ;
local main_b = _b[policy] ;
local main_se = _se[policy] ;

/* generate restricted residuals, and y-hats; these will be used later in the Wild bootstraps */
/* the hypothesis is: beta on policy = 0.  So to estimtate imposing this restriction, we will just drop "policy" from the regression */
reg lnwage age age2 yrseduc ;
predict resid_restricted , resid ;
predict yhat_restricted , xb ;
sort statefip ;
tempfile maindata ;
qui save `maindata' ;



/* next we do a nonparametric bootstrap; show that the resulting distribution of the bootstrapped betas can be non-normal and multimodal */
tempfile bootout ;
reg lnwage policy age age2 yrseduc , vce(bootstrap , cluster(statefip) reps(`bsreps') saving(`bootout') ) ;


drop _all ;
use `bootout' ;

summ ;
summ _b_policy , detail ;
kdensity _b_policy , bw(0.004) n(400) ;
graph export G6_boot_density.png , replace ;


/* next we do a wild bootstrap, Rademacher weights (+-1), and show that the resulting distribution of t-statistics takes only a few values
	(and hence the p-value of the main t-staistic is only interval-identified We will do the same with the Webb 6-point distribution, to see 
	that the t-stats has a much more "continuous" distribution.  */
use `maindata' , clear ;	
keep statefip ;
contract statefip ;
drop _freq ;
sort statefip ;
tempfile to_be_sampled ;
qui save `to_be_sampled' ;
list ;


local main_t = (`main_b' - 0) / `main_se' ;


cap postclose bs_output ;
tempfile bsout ;
cap erase `bsout' ;
postfile bs_output t_rad_res t_webb_res using `bsout' ;

qui forvalues bb = 1/`bsreps' { ;

	/* for the wild bootstrap */
	/* take the cluster list, generate 3 sets of residual transformations */
	/* then merge these back onto main dataset, created transformed residuals and then transformed y-hats */
	/* then estimate the models, and save the t-statistics */

	use statefip using `to_be_sampled' , replace ;
	gen my_uniform = uniform() ;
	gen wild_rademacher = -1 + 2 * (my_uniform >= 0.5) ;
	gen wild_webb = 	(-1) * sqrt(1.5) * (my_uniform > (0) & my_uniform <= (1/6)) +  
						(-1) * sqrt(1) * (my_uniform > (1/6) & my_uniform <= (2/6))  + 
						(-1) * sqrt(0.5) * (my_uniform > (2/6) & my_uniform <= (3/6)) + 
						(+1) * sqrt(0.5) * (my_uniform > (3/6) & my_uniform <= (4/6)) + 
						(+1) * sqrt(1) * (my_uniform > (4/6) & my_uniform <= (5/6))  + 
						(+1) * sqrt(1.5) * (my_uniform > (5/6) & my_uniform <= (6/6)) ; 
	
	keep statefip wild_rademacher wild_webb ;
	sort statefip ;
	
	merge 1:m statefip using `maindata' , assert(match) keep(match) nogenerate ;	
	
	/* create transformed residuals and new wild-outcome-variables */
	gen resid_wild_rad_restricted = resid_restricted * wild_rademacher ;
	gen resid_wild_webb_restricted = resid_restricted * wild_webb ;
	
	gen y_wild_rademacher_restricted = yhat_restricted + resid_wild_rad_restricted ;
	gen y_wild_webb_restricted = yhat_restricted + resid_wild_webb_restricted ;

	/* now estimate cluster-robust models on each of these three, generating t-statistics.
		For the restricted model, the t-stat is based on the null hypothesis.  for the unrestricted
		model the t-stat is based on the main (first) estiamted beta */
	
	reg y_wild_rademacher_restricted policy age age2 yrseduc  , cluster(statefip) ;
	local b_wild_rademacher_restricted = _b[policy] ;
	local se_wild_rademacher_restricted = _se[policy] ;

	reg y_wild_webb_restricted policy age age2 yrseduc  , cluster(statefip) ;
	local b_wild_webb_restricted = _b[policy] ;
	local se_wild_webb_restricted = _se[policy] ;

	/* make the t-stats ; store away into a postfile */

	local t_wild_rademacher_restricted  = (`b_wild_rademacher_restricted ' - 0) 
		/ `se_wild_rademacher_restricted' ;
	local t_wild_webb_restricted  = (`b_wild_webb_restricted ' - 0) 
		/ `se_wild_webb_restricted' ;

	post bs_output (`t_wild_rademacher_restricted') (`t_wild_webb_restricted') ;
	
} ;

postclose bs_output ;

use `bsout' , clear ;
summ ;
gen one = 1 ;
sort t_rad_res ;
gen Rademacher = sum(one) / _N ;
sort t_webb_res ;
gen Webb = sum(one) / _N ;
qui save `bsout' , replace ;

keep Rademacher t_rad_res ;
rename t_rad_res t_stat ;
tempfile rad ;
sort t_stat ;
qui save `rad' ;

qui use `bsout' ;
keep Webb t_webb_res ;
rename t_webb_res t_stat ;
sort t_stat ;
merge t_stat using `rad' ;
sort t_stat Rademacher Webb ;
summ ;

graph twoway (line Rademacher t_stat) (line Webb t_stat) , xline(`main_t') ti("CDFs of Bootstrapped t-distributions") 
	note("Note: 6 Clusters.  999 bootstrap replications.  Vertical line at main t-statistic.") ;
graph export G6_Wild_CDFs.png , replace ;

local lower = `main_t' - 0.04 ;
local upper = `main_t' + 0.04 ;
graph twoway (line Rademacher t_stat if t_stat >= `lower' & t_stat <= `upper') (line Webb t_stat if t_stat >= `lower' & t_stat <= `upper') , xline(`main_t') ti("CDFs of Bootstrapped t-distributions") 
	note("Note: 6 Clusters.  999 bootstrap replications.  Vertical line at main t-statistic.  Zoomed in near main t-staistic.") ;
graph export G6_Wild_CDFs_ZOOM.png , replace ;

end ;

runme ;

log close _all ;

